package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo5Submit {

  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .appName("submit")
      .getOrCreate()

    //导入隐式转换
    import spark.implicits._
    //导入spark所有函数
    import org.apache.spark.sql.functions._

    val linesDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep","\t")
      .schema("lines STRING")
      .load("/data/wordcount.txt") //指定hdfs路径

    //统计单词数量
    val wordCountDF: DataFrame = linesDF
      .select(explode(split($"lines",",")) as "word")
      .groupBy($"word")
      .agg(count($"word") as "c")

    //保存数据
    wordCountDF
      .write
      .format("csv")
      .option("sep",",")
      .mode(SaveMode.Overwrite)
      .save("/data/wc1")

    /**
      * 在jar所在的位置运行
      * spark-submit --class com.shujia.spark.sql.Demo5Submit --master yarn-client  spark-1.0.jar
      *
      */

  }

}
